Data cleaning


writeLines(capture.output(devtools::session_info()), "sessionInfo.txt")

  cleaned_data_w_age <- read_excel("data/20230424_nordsjoen_stepan.xlsx", skip = 1) %>%
    clean_names() %>% 
    select(species, year, alder_y, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w")) %>%
    drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
    mutate(across(.cols = -species, .fn = ~ as.numeric(.x))) %>% 
    # log transform response variables and covariate length
    mutate(across(.cols = matches("w_w"), .fn = ~ log10(as.numeric(.x) + 1))) %>%
    mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
    mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g|alder"), .fn = ~ as.numeric(.x))) %>%
    rename_with(
      ~ str_replace_all(.x, c(
        "w_w" = "ww",
        "^s" = "",
        "fettinnhold_percent" = "fat_pc",
        "lsi_percent" = "lsi_pc"
      )) %>%
        str_to_lower(),
      .cols = !matches("species")
    ) %>%
    # remove obs with no POP data
    filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
    pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
    # update two values found to be incorrectly entered
    mutate(
      hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
      hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
    )


  raw_data <- read_excel("data/20230424_nordsjoen_stepan.xlsx", skip = 1) %>%
    clean_names() %>%
    select(species, year, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w")) %>%
    drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
    mutate(across(.cols = -species, .fn = ~ as.numeric(.x)))

  cleaned_data <- raw_data %>%
    # log transform response variables and covariate length
    mutate(across(.cols = matches("w_w"), .fn = ~ log10(as.numeric(.x) + 1))) %>%
    mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
    mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g"), .fn = ~ as.numeric(.x))) %>%
    rename_with(
      ~ str_replace_all(.x, c(
        "w_w" = "ww",
        "^s" = "",
        "fettinnhold_percent" = "fat_pc",
        "lsi_percent" = "lsi_pc"
      )) %>%
        str_to_lower(),
      .cols = !matches("species")
    ) %>%
    # remove obs with no POP data
    filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
    pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
    # update two values found to be incorrectly entered
    mutate(
      hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
      hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
    )

Data description

cleaned_data %>%
  group_by(species, variable, year) %>%
  summarise(n_obs = n()) %>%
  ungroup() %>%
  select(-variable) %>%
  distinct(species, year, .keep_all = TRUE) %>% pivot_wider(names_from = "species", values_from = "n_obs") %>% 
  gt(
    caption = "Samples per year per species.",
    groupname_col = "species",
    rowname_col = "year"
  )
Table 1: Samples per year per species.
Cod Haddock Saithe
2005 50 50 50
2008 46 50 49
2011 51 49 NA
2013 50 50 48
2016 50 44 49
2019 9 41 50
2010 NA 50 50

Model selection

To investigate if there was a time trend for any contaminants in the three gadoid species, one identical model was chosen and fitted to each different dataset (3 species x 6 contaminants = 18). Potential confounders were fish age or proxies such as fish length and fish weight as well as liver weight, liver fat content and hepatosomatic or liver somatic index (LSI). It was known that effect of these variables will differ between species, owing to their distinct physiology and feeding habits. Hence, a model was fitted to each species as comparisons between different fish species were not a priority. Due to collinearity with LSI and fish length, weight of liver and fish was omitted. Fish age was omitted due to missingness of a substantial fraction of observations and collinearity with fish length. As such, a multiple linear model including explanatory variables year, fish length, fat percentage and LSI was chosen. Two outliers were identified in LSI. Upon further investigation, it was determined that these values were the result of typographic errors during data collection. These errors were corrected during the data cleaning. Additional 3 cod samples in 2019 missed values for fat percentage and were not included for the statistical analyses.

Model fitting

# linear regression model
pop_lm <- function(in_dat) {
  lm(
    data = in_dat, formula = value ~ year + length_cm + fat_pc + lsi_pc,
    na.action = na.exclude
  )
}

# conditional partial residuals manually calculated
p_man <- cleaned_data %>%
  ungroup() %>%
  group_by(species, variable) %>%
  drop_na(value) %>%
  nest() %>%
  # apply model and calculate medians of covariates
  mutate(
    model = map(data, pop_lm),
    med_length_cm = map(
      data,
      ~ median(.x$length_cm, na.rm = TRUE)
    ) %>% as.numeric(),
    med_fat_pc = map(
      data,
      ~ median(.x$fat_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    med_lsi_pc = map(
      data,
      ~ median(.x$lsi_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_length_cm = map(
      data,
      ~ mean(.x$length_cm, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_fat_pc = map(
      data,
      ~ mean(.x$fat_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_lsi_pc = map(
      data,
      ~ mean(.x$lsi_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_year = map(
      data,
      ~ mean(.x$year, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_value = map(
      data,
      ~ mean(.x$value, na.rm = TRUE)
    ) %>% as.numeric()
  ) %>%
  mutate(
    glance = map(model, broom::glance),
    year_pval = map(model, ~ broom::tidy(.x)[[2, 5]] %>% as.numeric()),
    year_estimate = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric() %>% {10^. - 1}),
    year_estimate_nolog = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric()),
    # extracting model coefficients for partial residual calculation
    tidy_coeff = map(model, ~ broom::tidy(.x) %>%
      select(term, estimate) %>%
      pivot_wider(names_from = "term", values_from = "estimate") %>%
      clean_names() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_coeff"))),
    # tidy gets summary variables of the model coefficients
    tidy = map(model, ~ broom::tidy(.x)),
    # augment gets per observation variables (e.g. residuals)
    augment = map2(model, data, ~ broom::augment(.x, .y)),
    partial_resids = map(model, ~ residuals(.x, type = "partial") %>%
      as_tibble() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_partial_resid"))),
    normal_resids = map(model, ~ residuals(.x) %>%
      as_tibble() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_normal_resid")))
  ) %>%
  # add package-generated plots for an extra check
  mutate(
    visreg = map(
      model,
      ~ visreg(.x, "year", gg = TRUE, type = "conditional")
    ),
    effect_plot = map(
      model,
      ~ effect_plot(.x, pred = year, interval = TRUE, partial.residuals = TRUE)
    ),
    variable = toupper(variable) %>% str_replace_all("_WW", ""),
    
  ) %>%
  # vifs to check for collinearity
  mutate(
    vifs = map(model, ~ car::vif(.x) %>% as_tibble())
    )

Model validation

Linearity


#Residuals vs fitted

p_man %>% unnest(augment) %>%
  ggplot(aes(.fitted, .resid)) +
    geom_point(size = 0.2) + 
    geom_hline(yintercept = 0, color = "red") +
  geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
    facet_grid(species~variable, scales = "free")

Some non-linear patterns observed e.g for HCB in haddock and cod, yet patterns are not systematic across species and contaminant. Thus, the current model was retained.


#Residuals vs each covariate

p_man %>% unnest(augment) %>%
  ggplot(aes(length_cm, .resid)) +
    geom_point(size = 0.2) + 
      geom_hline(yintercept = 0, color = "red") +
  geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
    facet_grid(species~variable, scales = "free")


p_man %>% unnest(augment) %>%
  ggplot(aes(fat_pc, .resid)) +
    geom_point(size = 0.2) + 
      geom_hline(yintercept = 0, color = "red") +
  geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
    facet_grid(species~variable, scales = "free")


p_man %>% unnest(augment) %>%
  ggplot(aes(lsi_pc, .resid)) +
    geom_point(size = 0.2) + 
    geom_hline(yintercept = 0, color = "red") +
  geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
    facet_grid(species~variable, scales = "free")

Normality

QQ-plot of response

ggplot(p_man %>% unnest(augment), aes(sample = value)) +
  stat_qq(size = 0.2) +
  stat_qq_line(color = "red", size = 0.4) +
  facet_grid(species ~ variable, scales = "free")

Distribution of response

ggplot(p_man %>% unnest(augment), aes(value)) +
    geom_histogram(aes(y=..density..), color='gray50',
        alpha=0.2, position = "identity")+
    geom_density(alpha=0.2, fill = "red")+
  facet_grid(species ~ variable, scales = "free")

Deviation from normality is observed and p-values should be interpreted with caution. Bimodality observed for HCH could not be linked to sampling location.

Homoskedasticity

Scale-location plot

p_man %>%
  unnest(augment) %>%
  ggplot(aes(x = .fitted, y = sqrt(abs(.std.resid)))) +
  geom_point(size = 0.2) +
  geom_smooth(method = "loess", formula = y ~ x, span = 2, se = T, size = 0.5) +
  geom_smooth(method = "lm", formula = y ~ 1, se = FALSE, color = "red", size = 0.5, linetype="dashed", span = 2)+
  facet_grid(species ~ variable, scales = "free")

No clear systematic patterns in the variance versus fitted value is observed, most showing relatively homoskedastic behavior.

Collinearity

# year
cleaned_data %>%
  select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
  mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
  pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
  ggplot(aes(as_factor(year), value)) +
  geom_point() + geom_boxplot()+
    stat_summary(fun = "mean", geom = "point", color = "red", size = 3, shape = 3)+
  theme(
      axis.text.x = element_text(angle = 90, hjust = 1)
    )+
  ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")


# # fat_pc
# cleaned_data %>%
#   select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
#   mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
#   pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
#   ggplot(aes(as_factor(year), value)) +
#   geom_point() + geom_boxplot()+
#   theme(
#       axis.text.x = element_text(angle = 90, hjust = 1)
#     )+
#   ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
# 
# # lsi_pc
# cleaned_data %>%
#   select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
#   mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
#   pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
#   ggplot(aes(as_factor(year), value)) +
#   geom_point() + geom_boxplot()+
#   theme(
#       axis.text.x = element_text(angle = 90, hjust = 1)
#     )+
#   ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
# 
# # length_cm
# cleaned_data %>%
#   select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
#   mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
#   pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
#   ggplot(aes(as_factor(year), value)) +
#   geom_point() + geom_boxplot()+
#   theme(
#       axis.text.x = element_text(angle = 90, hjust = 1)
#     )+
#   ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")

VIFs were determined to be below 3 for each dependent variable. However, some collinearity e.g. a decrease in LSI for haddock with year due to fish weight/size increase is observed.

Outliers

# cleaned_data %>%
#   select(-variable, -value) %>%
#   mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
#   pivot_longer(cols = -"species", names_to = "variable", values_to = "value") %>%
#   ggplot(aes(value)) +
#   geom_histogram(bins = 50) +
#   facet_grid(species ~ variable, scales = "free")+ theme(
#       axis.text.x = element_text(angle = 90, hjust = 1)
#     )
# 
# cleaned_data %>%
#   select(-variable, -value) %>%
#   mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
#   pivot_longer(cols = -"species", names_to = "variable", values_to = "value") %>%
#   ggplot(aes(value)) + geom_boxplot() + facet_grid(species ~ variable, scales = "free")+ theme(
#       axis.text.x = element_text(angle = 90, hjust = 1)
#     )

Two outliers were identified in LSI. Upon further investigation, it was determined that these values were the result of typographic errors during data collection. These errors were corrected during the data cleaning above.

PCB6 and HCB levels

  pcb_hcb <- read_excel("data/20230907_Datasettet med PCB6.xlsx", skip = 1) %>%
    clean_names() %>% 
    select(species, year, alder_y, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w|pcb")) %>%
    drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
    mutate(across(.cols = -species, .fn = ~ as.numeric(.x))) %>% 
    mutate(across(.cols = matches("length_cm"), .fn = ~ as.numeric(.x))) %>%
    mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g|alder|pcb"), .fn = ~ as.numeric(.x))) %>%
    rename_with(
      ~ str_replace_all(.x, c(
        "w_w" = "ww",
        "^s" = "",
        "fettinnhold_percent" = "fat_pc",
        "lsi_percent" = "lsi_pc"
      )) %>%
        str_to_lower(),
      .cols = !matches("species")
    ) %>%
    # remove obs with no POP data
    filter(if_any(matches("ww|pcb"), ~ !is.na(.x))) %>%
    pivot_longer(cols = matches("ww|pcb"), names_to = "variable", values_to = "value") %>%
    # update two values found to be incorrectly entered
    mutate(
      hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
      hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
    )


pcb_hcb %>%
  filter(variable %in% c("pcb6", "hcb_ww")) %>% ungroup() %>% 
  mutate(hline_leg = if_else(variable == "pcb6", 200, 10) %>%
           as.numeric()) %>% 
  # unnest(data) %>%
  # mutate(value = 10^value - 1) %>%
  filter(value < 1300) %>%
  ggplot(aes(year, value, group = year)) +
  geom_jitter(width = 0.35, size = 0.3) +
  geom_boxplot(outlier.shape = NA, width = 1.65) +
  scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
  stat_summary(fun.y = mean, geom = "point", shape = 3, size = 1.8, color = "red", fill = "red") +
  geom_hline(na.rm = TRUE, aes(yintercept = hline_leg), color = "red", linetype = "dashed") +
    scale_y_continuous(breaks = pretty_breaks(n = 6))+

  ylab("Concentration [\u00b5g/kg]") +
  xlab("Year") +
  facet_grid(variable ~ species, scales = "free")


ggsave("hcb_pcb_reg.svg", width = 6, height = 3.5)

Bivariate correlations

LSI DDTs and HCHs in Haddock

p_man %>% 
  filter(variable %in% c("DDT", "HCH"),
         species == "Haddock") %>% unnest(data) %>% 
  mutate(value = 10^value - 1) %>%
  ggplot(aes(lsi_pc, value))+
  geom_point(size = 0.5) + 
  geom_smooth(method = "lm", color = "red", size = 0.75) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))+
  facet_wrap(~variable, scales = "free") + 
    ylab("Concentration [\u00b5g/kg]") +
  xlab("LSI [%]")

ggsave("ddt_hch_bivar.svg", width = 9, height = 3)

PCB7 and DDT versus age

cleaned_data_w_age %>%
  mutate(
    variable = toupper(variable) %>% str_replace_all("_WW", "")
  ) %>% 
  filter(variable %in% c("DDT", "PCB7"),
         species == "Cod") %>% 
  mutate(value = 10^value - 1) %>%
  ggplot(aes(alder_y, value))+
  geom_point(size = 0.5) + 
  geom_smooth(method = "lm", color = "red", size = 0.75) + 
  scale_x_continuous(breaks = pretty_breaks(n = 5))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))+
  facet_wrap(~factor(variable, levels = c(
    "PCB7",
    "DDT",
    "HCH",
    "HCB",
    "TNC",
    "PBDE"
  )), scales = "free",
             ) + 
    ylab("Concentration [\u00b5g/kg]") +
  xlab("Age [years]")

ggsave("pcb_ddt_age.svg", width = 9, height = 3)
  

Partial residuals

Background

Assuming the least square estimate of the covariates \(X_{1}\), \(X_{2}\) and \(X_{3}\) and \(y_i\) and \(\epsilon_i\) is the response and residual for the i-th point:

\[ y_i = f_1(x_{i1}) + f_2(x_{i2}) + f_3(x_{i3}) + \epsilon_i. \]

The partial residual of \(X_3\) for the i-th observation is then generally defined as (Sohil et al., 2021) (Larsen and McCleary, 1972):

\[ r_i = y_i-f_1\left(x_{i 1}\right)-f_2\left(x_{i 2}\right) = \epsilon_i + f_3(x_{i3}) , \]

where in the present case

\[ y_i = \hat b_0 + \hat b_{year}(year_{i})+ \hat b_{length}(length_{i})+\hat b_{fat pc}(fatpc_{i})+ \hat b_{lsi pc}(lsi pc_{i})+ \epsilon_i \] or, similarly

\[ y_i = \bar y + \hat b_{year}\cdot (x_{year}-\bar x_{year}) + \hat b_{length}\cdot (x_{length}-\bar x_{length}) + \hat b_{fatpc}\cdot (x_{fatpc}-\bar x_{fatpc}) + \hat b_{lsipc}\cdot (x_{lsipc}-\bar x_{lsipc}) + \epsilon_i \]

Conditional partial residual plots

p_man %>%
  unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
  ungroup() %>%
  ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
  geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
  # geom_point(aes(year, value), size = 0.2, color = "darkgrey", position = position_jitternudge(width = 0.2, height = 0,
  #                                    seed = 123, x = 0.5,
  #                                    direction = "as.is",
  #                                    nudge.from = "jittered"))+
  ### TEST COND PART RESID
  # geom_point(aes(year, value_normal_resid + mean_value +
  #                year_coeff*(year - mean_year) + length_cm_coeff*(med_length_cm - mean_length_cm) + fat_pc_coeff*(med_fat_pc - mean_fat_pc)), size = 0.2, color = "orange", position = position_jitternudge(width = 0.2, height = 0,
  #                                    seed = 123, x = -0.5,
  #                                    direction = "as.is",
  #                                    nudge.from = "jittered"))+
  ### TEST COND PART RESID END
  stat_smooth(method = "lm", na.rm = TRUE, se = FALSE, color = "red", size = 0.5) +
  # conditional coloring percentage change
  geom_text(
   data = p_man %>% group_by(species, variable) %>%
     distinct(year_estimate, .keep_all = TRUE),
   color =
     case_when(
       p_man %>% group_by(species, variable) %>%
         distinct(year_estimate) %>% pull(year_estimate) > 0 &
         p_man %>%
           group_by(species, variable) %>%
           distinct(year_pval) %>%
           pull(year_pval) < 0.05 ~ "red",
       p_man %>% group_by(species, variable) %>%
         distinct(year_estimate) %>% pull(year_estimate) < 0 &
         p_man %>%
           group_by(species, variable) %>%
           distinct(year_pval) %>%
           pull(year_pval) < 0.05 ~ "green",
       TRUE ~ "black"
     ),
   fontface = "bold",
   x = -Inf, y = Inf, hjust = -0.075, vjust = 1.5,
   size = p_man %>% group_by(species, variable) %>%
         distinct(year_estimate) %>% pull(year_estimate) %>% as.numeric() %>% abs()*150/14 + 2.3,
   # size = 3.7,
   aes(label = paste0(round(year_estimate %>% as.numeric() * 100, digits = 1) %>% number(style_positive = "plus", accuracy = 0.1), "%"))
 )+
  # p-values
    geom_text(
    data = p_man %>% group_by(species, variable) %>%
      distinct(year_pval, .keep_all = TRUE), color = "grey30",
    x = 2004.9, y = 0.1, hjust = 0, vjust = 0.6, size = 2.5,
    aes(label = paste0("p = ", round(year_pval %>% as.numeric(), digits = 3) %>% format(nsmall = 2)))
  ) +
  
  #   geom_text(
  #   data = p_man %>% unnest(tidy) %>% group_by(species, variable) %>%
  #     distinct(year_coeff, .keep_all = TRUE), color = "red",
  #   x = -Inf, y = Inf, hjust = -0.1, vjust = 3, size = 2.5,
  #   aes(label = paste0("slope = ", round(year_coeff %>% as.numeric(), digits = 4)))
  # ) +
  scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
  scale_y_continuous(breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3), limits = c(0, 3.2)) +
  ylab("log(Concentration + 1) [\u00b5g/kg]") +
  xlab("Year | (Length, Fat and LSI)") +

    facet_grid(species ~ factor(variable, levels = c(
    "PCB7",
    "DDT",
    "HCH",
    "HCB",
    "TNC",
    "PBDE"
  )))


ggsave("conditional_pr_stepan_data.svg", width = 11, height = 6)

# comditional jtools and visreg
# p_man$visreg[[18]] + scale_y_continuous(breaks = pretty_breaks(n = 10))
# p_man$effect_plot[[18]]
# patchwork::wrap_plots(p_man$visreg)

Strength of decrease

Table


p_man %>% clean_names() %>%
  select(species, variable, tidy) %>%
  unnest(tidy) %>%
  filter(term == "year") %>%
  select(species, variable, p.value, estimate) %>%
  mutate(estimate = 10^estimate - 1) %>%
  gt(caption = "Percentage decrease per year.",
     groupname_col = "variable",
     rowname_col = "species") %>% 
  tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate > 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "red"))
  ) %>% 
  
    tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate < 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "green"))
  ) %>% 
  fmt_number(everything(), decimals = 3) %>% 
  fmt_percent(estimate, decimals = 1) %>% 
  tab_footnote("Estimated annual percentage change obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
Table 2: Percentage decrease per year.
p.value estimate
PCB7
Cod 0.039 −1.5%
Haddock 0.000 −6.8%
Saithe 0.000 −3.0%
DDT
Cod 0.055 −1.6%
Haddock 0.000 −8.6%
Saithe 0.000 −4.5%
HCH
Cod 0.017 −0.9%
Haddock 0.000 −2.7%
Saithe 0.000 −2.8%
HCB
Cod 0.020 1.1%
Haddock 0.000 1.8%
Saithe 0.035 −0.8%
TNC
Cod 0.394 −0.7%
Haddock 0.000 −4.9%
Saithe 0.000 −3.5%
PBDE
Cod 0.000 −6.8%
Haddock 0.000 −12.9%
Saithe 0.000 −15.9%
Estimated annual percentage change obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.
p_man %>% clean_names() %>%
  select(species, variable, tidy) %>%
  unnest(tidy) %>% 
  filter(term == "lsi_pc") %>%
  select(species, variable, p.value, estimate) %>%
  mutate(estimate = 10^estimate - 1) %>%
  gt(caption = "Percentage change per percentage increase in lsi",
     groupname_col = "variable",
     rowname_col = "species") %>% 
  tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate > 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "red"))
  ) %>% 
  
    tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate < 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "green"))
  ) %>% 
  fmt_number(everything(), decimals = 3) %>% 
  fmt_percent(estimate, decimals = 1) %>% 
  tab_footnote("Estimated percentage change per percent LSI increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
Table 3: Percentage change per percentage increase in lsi
p.value estimate
PCB7
Cod 0.000 −9.4%
Haddock 0.000 −19.2%
Saithe 0.019 −2.7%
DDT
Cod 0.000 −10.6%
Haddock 0.000 −16.7%
Saithe 0.070 −2.2%
HCH
Cod 0.206 1.5%
Haddock 0.000 3.6%
Saithe 0.544 −0.2%
HCB
Cod 0.000 −8.4%
Haddock 0.000 −5.3%
Saithe 0.000 −3.4%
TNC
Cod 0.000 −11.0%
Haddock 0.000 −14.1%
Saithe 0.007 −3.1%
PBDE
Cod 0.001 −11.1%
Haddock 0.000 14.3%
Saithe 0.002 −3.6%
Estimated percentage change per percent LSI increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.

p_man %>% clean_names() %>%
  select(species, variable, tidy) %>%
  unnest(tidy) %>% 
  filter(term == "fat_pc") %>%
  select(species, variable, p.value, estimate) %>%
  mutate(estimate = 10^estimate - 1) %>%
  gt(caption = "Percentage change per percentage increase in fat",
     groupname_col = "variable",
     rowname_col = "species") %>% 
  tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate > 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "red"))
  ) %>% 
  
    tab_style(
    location = cells_body(
      columns = estimate,
      rows = estimate < 0 & p.value < 0.05
    ),
    style = list(cell_text(color = "green"))
  ) %>% 
  fmt_number(everything(), decimals = 3) %>% 
  fmt_percent(estimate, decimals = 1) %>% 
  tab_footnote("Estimated percentage change per percent fat increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
Table 3: Percentage change per percentage increase in fat
p.value estimate
PCB7
Cod 0.000 0.8%
Haddock 0.170 0.4%
Saithe 0.048 −0.5%
DDT
Cod 0.000 1.3%
Haddock 0.000 1.7%
Saithe 0.041 −0.5%
HCH
Cod 0.000 1.5%
Haddock 0.000 0.9%
Saithe 0.000 1.1%
HCB
Cod 0.000 2.8%
Haddock 0.000 1.9%
Saithe 0.000 1.4%
TNC
Cod 0.000 1.4%
Haddock 0.000 1.3%
Saithe 0.047 −0.5%
PBDE
Cod 0.000 1.2%
Haddock 0.741 −0.1%
Saithe 0.959 −0.0%
Estimated percentage change per percent fat increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.

Visualization without log-transformation


cleaned_data_notrans <- raw_data %>%
    # log transform covariate length
    mutate(across(.cols = matches("w_w"), .fn = ~ as.numeric(.x))) %>%
    mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
    mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g"), .fn = ~ as.numeric(.x))) %>%
    rename_with(
      ~ str_replace_all(.x, c(
        "w_w" = "ww",
        "^s" = "",
        "fettinnhold_percent" = "fat_pc",
        "lsi_percent" = "lsi_pc"
      )) %>%
        str_to_lower(),
      .cols = !matches("species")
    ) %>%
    # remove obs with no POP data
    filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
    pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
    # update two values found to be incorrectly entered
    mutate(
      hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
      hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
      lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
    )

p_man_notrans <- cleaned_data_notrans %>%
  ungroup() %>%
  group_by(species, variable) %>%
  drop_na(value) %>%
  nest() %>%
  # apply model and calculate medians of covariates
  mutate(
    model = map(data, pop_lm),
    med_length_cm = map(
      data,
      ~ median(.x$length_cm, na.rm = TRUE)
    ) %>% as.numeric(),
    med_fat_pc = map(
      data,
      ~ median(.x$fat_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    med_lsi_pc = map(
      data,
      ~ median(.x$lsi_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_length_cm = map(
      data,
      ~ mean(.x$length_cm, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_fat_pc = map(
      data,
      ~ mean(.x$fat_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_lsi_pc = map(
      data,
      ~ mean(.x$lsi_pc, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_year = map(
      data,
      ~ mean(.x$year, na.rm = TRUE)
    ) %>% as.numeric(),
    mean_value = map(
      data,
      ~ mean(.x$value, na.rm = TRUE)
    ) %>% as.numeric()
  ) %>%
  mutate(
    glance = map(model, broom::glance),
    year_pval = map(model, ~ broom::tidy(.x)[[2, 5]] %>% as.numeric()),
    year_estimate = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric() %>% {10^. - 1}),
    year_estimate_nolog = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric()),
    # extracting model coefficients for partial residual calculation
    tidy_coeff = map(model, ~ broom::tidy(.x) %>%
      select(term, estimate) %>%
      pivot_wider(names_from = "term", values_from = "estimate") %>%
      clean_names() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_coeff"))),
    # tidy gets summary variables of the model coefficients
    tidy = map(model, ~ broom::tidy(.x)),
    # augment gets per observation variables (e.g. residuals)
    augment = map2(model, data, ~ broom::augment(.x, .y)),
    partial_resids = map(model, ~ residuals(.x, type = "partial") %>%
      as_tibble() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_partial_resid"))),
    normal_resids = map(model, ~ residuals(.x) %>%
      as_tibble() %>%
      rename_with(.cols = everything(), .fn = ~ paste0(.x, "_normal_resid")))
  ) %>%
  # add package-generated plots for an extra check
  mutate(
    visreg = map(
      model,
      ~ visreg(.x, "year", gg = TRUE, type = "conditional")
    ),
    effect_plot = map(
      model,
      ~ effect_plot(.x, pred = year, interval = TRUE, partial.residuals = TRUE)
    ),
    variable = toupper(variable) %>% str_replace_all("_WW", ""),
    
  ) %>%
  # vifs to check for collinearity
  mutate(
    vifs = map(model, ~ car::vif(.x) %>% as_tibble())
    )

p_man_notrans %>%
  unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
  ungroup() %>%
  ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
  geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
  # geom_point(aes(year, value), size = 0.2, color = "darkgrey", position = position_jitternudge(width = 0.2, height = 0,
  #                                    seed = 123, x = 0.5,
  #                                    direction = "as.is",
  #                                    nudge.from = "jittered"))+
  ### TEST COND PART RESID
  # geom_point(aes(year, value_normal_resid + mean_value +
  #                year_coeff*(year - mean_year) + length_cm_coeff*(med_length_cm - mean_length_cm) + fat_pc_coeff*(med_fat_pc - mean_fat_pc)), size = 0.2, color = "orange", position = position_jitternudge(width = 0.2, height = 0,
  #                                    seed = 123, x = -0.5,
  #                                    direction = "as.is",
  #                                    nudge.from = "jittered"))+
  ### TEST COND PART RESID END
  geom_smooth(method = "lm", color = "orange")+
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    legend.key = element_blank(),
    strip.background = element_rect(colour="black", fill="white"),
    strip.text.x = element_text(face = "bold"),
    strip.text.y = element_text(face = "bold")
  ) +
  scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
  ylab("Concentration [\u00b5g/kg]") +
  xlab("Year | (Length, Fat and LSI)") +

    facet_grid2(species ~ factor(variable, levels = c(
    "PCB7",
    "DDT",
    "HCH",
    "HCB",
    "TNC",
    "PBDE"
  )), scales = "free_y", independent = "y")

Regression tables



# Cod
Cod_reg_tab <- rbind(
  p_man %>%
    filter(species == "Cod") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(tidy) %>%
    select(-model, -glance, -augment) %>%
    select(-std.error, -statistic) %>%
    pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    mutate(
      term = str_replace_all(term, c(
        "year" = "Year",
        "length_cm" = "Length",
        "fat_pc" = "Fat %",
        "lsi_pc" = "LSI %"
      ))
    ) %>%
    filter(!str_detect(term, "tercept")) %>%
    ungroup(),
  p_man %>%
    filter(species == "Cod") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(glance) %>%
    select(-model, -tidy, -augment) %>%
    mutate(term = "Model") %>%
    select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
    rename(n_obs = nobs, F.statistic = statistic) %>%
    pivot_longer(cols = -c(variable, term), names_to = "property") %>%
    pivot_wider(names_from = "variable", values_from = "value")
) %>%
  gt(
    caption = "Regression summary table for Cod, bold indicating non-significant coefficients.",
    groupname_col = "term",
    rowname_col = "property"
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    decimals = 3
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "n_obs",
    decimals = 0
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "F.statistic",
    decimals = 0
  ) %>%
  tab_style(
    location = cells_body(
      columns = PCB7,
      rows = property == "p.value" & (PCB7 > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = DDT,
      rows = property == "p.value" & (DDT > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCH,
      rows = property == "p.value" & (HCH > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCB,
      rows = property == "p.value" & (HCB > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = TNC,
      rows = property == "p.value" & (TNC > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = PBDE,
      rows = property == "p.value" & (PBDE > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(
      columns = everything()
    )
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups(groups = everything())
  ) %>% tab_header("Cod")
  Cod_reg_tab
Table 4: Regression summary table for Cod, bold indicating non-significant coefficients.
Cod
PCB7 DDT HCH HCB TNC PBDE
Year
estimate −0.007 −0.007 −0.004 0.005 −0.003 −0.031
p.value 0.039 0.055 0.017 0.020 0.394 0.000
Length
estimate 0.787 1.225 −0.012 0.296 0.933 0.725
p.value 0.000 0.000 0.831 0.000 0.000 0.000
Fat %
estimate 0.004 0.006 0.007 0.012 0.006 0.005
p.value 0.000 0.000 0.000 0.000 0.000 0.000
LSI %
estimate −0.043 −0.049 0.006 −0.038 −0.051 −0.051
p.value 0.000 0.000 0.206 0.000 0.000 0.001
Model
adj.r.squared 0.228 0.399 0.662 0.715 0.303 0.307
F.statistic 20 43 126 161 26 24
p.value 0.000 0.000 0.000 0.000 0.000 0.000
n_obs 256 256 256 256 232 206
  gtsave(Cod_reg_tab, "Cod_reg_tab.docx")
  
# Haddock
Haddock_reg_tab <- rbind(
  p_man %>%
    filter(species == "Haddock") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(tidy) %>%
    select(-model, -glance, -augment) %>%
    select(-std.error, -statistic) %>%
    pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    mutate(
      term = str_replace_all(term, c(
        "year" = "Year",
        "length_cm" = "Length",
        "fat_pc" = "Fat %",
        "lsi_pc" = "LSI %"
      ))
    ) %>%
    filter(!str_detect(term, "tercept")) %>%
    ungroup(),
  p_man %>%
    filter(species == "Haddock") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(glance) %>%
    select(-model, -tidy, -augment) %>%
    mutate(term = "Model") %>%
    select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
    rename(n_obs = nobs, F.statistic = statistic) %>%
    pivot_longer(cols = -c(variable, term), names_to = "property") %>%
    pivot_wider(names_from = "variable", values_from = "value")
) %>%
  gt(
    caption = "Regression summary table for Haddock, bold indicating non-significant coefficients.",
    groupname_col = "term",
    rowname_col = "property"
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    decimals = 3
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "n_obs",
    decimals = 0
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "F.statistic",
    decimals = 0
  ) %>%
  tab_style(
    location = cells_body(
      columns = PCB7,
      rows = property == "p.value" & (PCB7 > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = DDT,
      rows = property == "p.value" & (DDT > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCH,
      rows = property == "p.value" & (HCH > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCB,
      rows = property == "p.value" & (HCB > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = TNC,
      rows = property == "p.value" & (TNC > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = PBDE,
      rows = property == "p.value" & (PBDE > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(
      columns = everything()
    )
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups(groups = everything())
  ) %>% tab_header("Haddock")
  Haddock_reg_tab
Table 5: Regression summary table for Haddock, bold indicating non-significant coefficients.
Haddock
PCB7 DDT HCH HCB TNC PBDE
Year
estimate −0.031 −0.039 −0.012 0.008 −0.022 −0.060
p.value 0.000 0.000 0.000 0.000 0.000 0.000
Length
estimate 1.756 0.653 0.058 0.051 1.067 1.892
p.value 0.000 0.030 0.346 0.573 0.000 0.000
Fat %
estimate 0.002 0.007 0.004 0.008 0.005 −0.001
p.value 0.170 0.000 0.000 0.000 0.000 0.741
LSI %
estimate −0.093 −0.079 0.015 −0.023 −0.066 0.058
p.value 0.000 0.000 0.000 0.000 0.000 0.000
Model
adj.r.squared 0.370 0.209 0.716 0.498 0.201 0.380
F.statistic 50 23 211 83 22 44
p.value 0.000 0.000 0.000 0.000 0.000 0.000
n_obs 334 334 334 334 334 279
  gtsave(Haddock_reg_tab, "Haddock_reg_tab.docx")
  
 
 # Saithe
Saithe_reg_tab <- rbind(
  p_man %>%
    filter(species == "Saithe") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(tidy) %>%
    select(-model, -glance, -augment) %>%
    select(-std.error, -statistic) %>%
    pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    mutate(
      term = str_replace_all(term, c(
        "year" = "Year",
        "length_cm" = "Length",
        "fat_pc" = "Fat %",
        "lsi_pc" = "LSI %"
      ))
    ) %>%
    filter(!str_detect(term, "tercept")) %>%
    ungroup(),
  p_man %>%
    filter(species == "Saithe") %>%
    ungroup() %>%
    select(variable, model, tidy, glance, augment) %>%
    unnest(glance) %>%
    select(-model, -tidy, -augment) %>%
    mutate(term = "Model") %>%
    select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
    rename(n_obs = nobs, F.statistic = statistic) %>%
    pivot_longer(cols = -c(variable, term), names_to = "property") %>%
    pivot_wider(names_from = "variable", values_from = "value")
) %>%
  gt(
    caption = "Regression summary table for Saithe, bold indicating non-significant coefficients.",
    groupname_col = "term",
    rowname_col = "property"
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    decimals = 3
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "n_obs",
    decimals = 0
  ) %>%
  fmt_number(
    columns = matches("[A-Z]{2}"),
    rows = property == "F.statistic",
    decimals = 0
  ) %>%
  tab_style(
    location = cells_body(
      columns = PCB7,
      rows = property == "p.value" & (PCB7 > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = DDT,
      rows = property == "p.value" & (DDT > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCH,
      rows = property == "p.value" & (HCH > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = HCB,
      rows = property == "p.value" & (HCB > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = TNC,
      rows = property == "p.value" & (TNC > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    location = cells_body(
      columns = PBDE,
      rows = property == "p.value" & (PBDE > 0.05)
    ),
    style = list(cell_text(weight = "bold"))
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(
      columns = everything()
    )
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups(groups = everything())
  ) %>% tab_header("Saithe")
  Saithe_reg_tab
Table 6: Regression summary table for Saithe, bold indicating non-significant coefficients.
Saithe
PCB7 DDT HCH HCB TNC PBDE
Year
estimate −0.013 −0.020 −0.012 −0.003 −0.016 −0.075
p.value 0.000 0.000 0.000 0.035 0.000 0.000
Length
estimate 1.555 1.565 0.045 0.381 1.619 1.902
p.value 0.000 0.000 0.367 0.000 0.000 0.000
Fat %
estimate −0.002 −0.002 0.005 0.006 −0.002 0.000
p.value 0.048 0.041 0.000 0.000 0.047 0.959
LSI %
estimate −0.012 −0.010 −0.001 −0.015 −0.014 −0.016
p.value 0.019 0.070 0.544 0.000 0.007 0.002
Model
adj.r.squared 0.306 0.345 0.644 0.301 0.342 0.785
F.statistic 34 40 134 33 39 180
p.value 0.000 0.000 0.000 0.000 0.000 0.000
n_obs 296 296 296 296 296 197
  gtsave(Saithe_reg_tab, "Saithe_reg_tab.docx")

Graphical abstract

# p_man %>%
#   unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
#   ungroup() %>%
#   ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
#   geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
#   stat_smooth(method = "lm", na.rm = TRUE, se = FALSE, color = "red", size = 0.5) +
#   # conditional coloring percentage change
#   geom_text(
#    data = p_man %>% group_by(species, variable) %>%
#      distinct(year_estimate, .keep_all = TRUE),
#    color =
#      case_when(
#        p_man %>% group_by(species, variable) %>%
#          distinct(year_estimate) %>% pull(year_estimate) > 0 &
#          p_man %>%
#            group_by(species, variable) %>%
#            distinct(year_pval) %>%
#            pull(year_pval) < 0.05 ~ "red",
#        p_man %>% group_by(species, variable) %>%
#          distinct(year_estimate) %>% pull(year_estimate) < 0 &
#          p_man %>%
#            group_by(species, variable) %>%
#            distinct(year_pval) %>%
#            pull(year_pval) < 0.05 ~ "green",
#        TRUE ~ "black"
#      ),
#    fontface = "bold",
#    x = -Inf, y = Inf, hjust = -0.075, vjust = 1.5,
#    size = p_man %>% group_by(species, variable) %>%
#          distinct(year_estimate) %>% pull(year_estimate) %>% as.numeric() %>% abs()*150/14 + 2.3,
#    # size = 3.7,
#    aes(label = paste0(round(year_estimate %>% as.numeric() * 100, digits = 1) %>% number(style_positive = "plus", accuracy = 0.1), "%"))
#  )+
#   # p-values
#     geom_text(
#     data = p_man %>% group_by(species, variable) %>%
#       distinct(year_pval, .keep_all = TRUE), color = "grey30",
#     x = 2004.9, y = 0.1, hjust = 0, vjust = 0.6, size = 2.5,
#     aes(label = paste0("p = ", round(year_pval %>% as.numeric(), digits = 3) %>% format(nsmall = 2)))
#   ) +
# 
#   scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
#   scale_y_continuous(breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3), limits = c(0, 3.2)) +
#   ylab("log(Concentration + 1) [\u00b5g/kg]") +
#   xlab("Year | (Length, Fat and LSI)") +
#   
#   theme_bw() +
#   theme(
#     axis.text.x = element_text(angle = 90, hjust = 1),
#     legend.key = element_blank(),
#     strip.background = element_rect(colour="black", fill="white"),
#     strip.text.x = element_text(face = "bold"),
#     strip.text.y = element_text(face = "bold"))+
#   
#     facet_grid(species ~ factor(variable, levels = c(
#     "PCB7",
#     "DDT",
#     "HCH",
#     "HCB",
#     "TNC",
#     "PBDE"
#   )))
# 
# ggsave("graphical_abstr.svg", width = 11, height = 6)

Stomach content of haddock and cod

library(ggrepel)
library(see)
piedata <- read_excel("data/stomach_all_haddock_and_cod_all_stations_2011.xlsx", skip = 2) %>% 
  select(1, Haddock, Cod) %>%
  rename(c("Group" = 1)) %>%
  pivot_longer(cols = -Group, names_to = "Species") %>%
  group_by(Species) %>%
  mutate(value = value / sum(value)) %>% 
  mutate(
  csum = rev(cumsum(rev(value))),
  pos = value / 2 + lead(csum, 1),
  pos = if_else(is.na(pos), value / 2, pos),
  group_id = 15-row_number()
)

piedata %>%
  mutate(Group = fct_reorder(Group, group_id)) %>% 
  ggplot(aes(
    x = "",
    y = value,
    fill = Group
  )) +
  geom_col() +
  coord_polar(theta = "y") +
  # guides(fill = guide_legend(title = "Group")) +
  # theme_void() +
  facet_wrap(~Species)

ggsave("stommach_pie.svg", width = 9, height = 6)

Method description

Visualizations in the manuscript and statistical analyses for the determination of temporal trends were carried out in R version 4.2.1. Data with missing dependent or response variables were removed, and response variables were added one prior to log-transformation with base 10 to meet assumptions of homoscedasticity and normality while preserving zero-valued observations. The dependent variable length was also log-transformed. To correct for known confounders, a multiple linear regression model with the dependent variables year, fish length, liver fat content and liver somatic index (LSI) was fitted to the cleaned and transformed data for each POP and species. To visualize the effect of year on the POP concentrations corrected for the other dependent variables, the conditional partial residuals versus year were plotted, keeping length, fat and LSI constant at their median values. The significance of the regression coefficient for the year was determined using a Wald test to test the null hypothesis of no time trend. As detailed in the Supplementary Materials, model validation revealed some deviations from the model assumptions. Thus, marginally significant effects were interpreted with caution. More details regarding the statistical model are given in Supplementary Materials.

References